import re,requests,json,time
from requests.exceptions import RequestException



def getPage(url):
	'''根据url获取页面内容'''
	try:
		headers={
		"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36"
		}
		res=requests.get(url,headers=headers)
		if res.status_code==200:
			return res.text
		else:
			return None
	except RequestException as e:
		return str(e)

#页面正则解析
def parsePage(content):
	#print(content)
	result=re.findall('<div class="img_list">.*?<img.*?src="(.*?)".*?>.*?</div>.*?<div class="des">.*?<h2>.*?<a href="(.*?)".*?>(.*?)</a>.*?</h2>.*?<p class="room">(.*?)</p>.*?<p class="add">.*?<a.*?>(.*?)</a>.*?<a.*?>(.*?)</a>.*?</p>.*?<div class="money">.*?<b>(.*?)</b>.*?</div>.*?</div>',content,re.S)
	for item in result:
		yield {
		'images':item[0].strip(),
		'url':item[1].strip(),
		'title':item[2].strip(),
		'roomType':item[3].strip(),
		'address':item[4].strip(),
		'roomName':item[5].strip(),
		'price':item[6]+'元/月',
		}
#将抓取内容存入文件中
def writeData(content):
	with open('./data.json','a',encoding="utf-8") as f:
		f.write(content)
		f.close()

def getRoomData(url):
	content=getPage(url)
	patt=parsePage(content)
	for i in patt:
		writeData(json.dumps(i,ensure_ascii=False)+',\n')	

if __name__ == '__main__':
	#查找10页数据
	p =int(input('输入爬取总页数：'))+1;
	for page in range(1,p):
		print('***正在爬取第'+str(page)+'页***\n')
		url='http://nj.58.com/chuzu/pn'+str(page)+'/?ClickID=2'
		getRoomData(url)
		time.sleep(2)
	print('*'*12,'爬取结束，请查看目录下data.json文件','*'*12)

